/*

04_did_plots_nolines.do

Purpose: create treatment balance and outcomes plots
Inputs: student-level-panel
Outputs: all figures except binscatter_sim_vs_real_risk
	
*/

/// Dependent variables
global pretreatment = "female black hispanic"

/// Independent variables
global rdd_vars = "above app_risk c.app_risk#1.above"
global full_controls "i.block hs#resident k#resident prek3#resident prek4#resident"
global part_controls "i.block"

use "$int/student-level-panel", clear

// Restrict on RCT eligible grades in years 2019 and 2020
keep if simulator_eligible == 1 & any_baseline == 1
// Drop applicants in eligible grades that have not been assigned to treatment in 2020
drop if mi(treat) & year == 2020

** Fix riskq
cap drop riskq
egen riskq = cut(app_risk), at(0(0.1)1)
replace riskq = 0.9 if app_risk == 1 & mi(riskq)
gen control = treat == 0
gen sim_treatment = treat == 1
gen warning_treatment = treat == 3 
gen pre = year == 2019
gen post = year == 2020 
gen samp_cond = app_risk > 0.01 & app_risk < 0.99 if !mi(app_risk)
gen p1 = app_risk <= 0.01 if !mi(app_risk)
gen p99 = app_risk >= 0.99 if !mi(app_risk)

gen excl_hsresident = hs == 0 | resident == 0 

// Center running variable at 0 
replace app_risk = app_risk - 0.5
gen above = app_risk >=0 if !mi(app_risk)

label var control "Control Group"
label var sim_treatment "Treatment Simulator-Only"
label var warning_treatment "Treatment Warning"
label var pre "2019"
	
foreach var in $pretreatment $outcomes_sec_stage_figure {
	preserve
		keep if excl_hsresident == 1
		keep if !mi(app_risk)

		local `var'_label = "`:var la `var''"
		foreach time in pre post {

			if "`var'" == "resident" | "`var'" == "hs" | ///
				"`var'" == "prek4" | "`var'" == "prek3" | "`var'" == "k"{
				
				qui reg `var' above app_risk c.app_risk#1.above ///
					/*i.block*/ ///
					if samp_cond == 1 & `time' == 1 , r
				est sto e_`var'
				
			} 
			else {
				qui reg `var' above app_risk c.app_risk#1.above ///
				if samp_cond == 1 & `time' == 1 , r
				est sto e_`var'
			}

			gen pred_`var'_`time'_b = _b[_cons] + _b[app_risk]* app_risk ///
				if riskq < 0.5 & `time' == 1 
				
			gen pred_`var'_`time'_a = _b[_cons] + _b[above] +  ///
				_b[app_risk]* app_risk + _b[c.app_risk#1.above] * app_risk ///
					if riskq >= 0.5 & `time' == 1 
		
		} // time
	
		replace app_risk = app_risk + 0.5
		replace email_received = 1 if warning_treatment==1
		replace email_received = . if year == 2019
		expand 2 if year == 2020, gen(copy)
		replace email_received = . if copy == 1 
		collapse (mean) `var'  app_risk pred_`var'*_b pred_`var'*_a ///
			samp_cond ///
			, by(riskq post email_received p1 p99)
		
		/// Setting y axis
		qui su `var'

		local lb = round(`r(min)',0.01)
		local ub = round(`r(max)',0.01)
		local steps = round( 0.01 + (`ub' - `lb') / 10 ,0.01)
		
		if `steps' > 0.05 {
			local lb = round(`lb',0.05)
			local ub = round(`ub',0.05)
			local steps = round(`steps',0.05)
			
		}
		
		if `steps' > 0.1 {
			local lb = round(`lb',0.1)
			local ub = round(`ub',0.1)
			local steps = round(`steps',0.1)
		}
		
		if `steps' > 0.2 {
			local lb = round(`lb',0.2)
			local ub = round(`ub',0.2)
			local steps = round(`steps',0.2)			
		}
		local ub_max = `ub' + `steps'/ 10
		
		// Fix yscale to to be 0 - 1 for pretreatment variables and make figures
		if strpos("$pretreatment","`var'") > 0 {
			local ub = 1
			local ub_max = 1
			local lb = 0 
			local steps = .2
		
			twoway ///
					/// Scatter and line for 2020
					(scatter `var' app_risk if post == 1 & samp_cond == 1 & mi(email_received),  ///
					msymbol(O) mfcol(maroon%40) mlcol(maroon) msize(medlarge)  ) ///
					(scatter `var' app_risk if post == 1 & p1 == 1 & mi(email_received),  ///
					msymbol(O) mfcol(maroon%40) mlcol(maroon) msize(vlarge)  ) ///
					(scatter `var' app_risk if post == 1 & p99 == 1 & mi(email_received),  ///
					msymbol(O) mfcol(maroon%40) mlcol(maroon) msize(vlarge)  ) ///
					/// Scatter and line for 2019
					(scatter `var' app_risk if post == 0& samp_cond == 1 & mi(email_received),  ///
						msymbol(D) mfcol(navy%80) mlcol(navy) msize(medlarge)  ) ///
					(scatter `var' app_risk if post == 0 & p1 == 1 & mi(email_received),  ///
						msymbol(D) mfcol(navy%80) mlcol(navy) msize(vlarge) ) ///
					(scatter `var' app_risk if post == 0 & p99 == 1 & mi(email_received),  ///
						msymbol(D) mfcol(navy%80) mlcol(navy) msize(vlarge)  ) ///
					, ///
					scheme(s1color) name(g1_`var', replace) ///
					xline(0.5, lpattern(dash) lwidth(medthick) lcol(darknavy)) ///
					ylab(`lb'(`steps')`ub') ysca(range(`lb' `ub'))	///
					xlab(0(0.1) 1) xsca(range(0 1)) ///
					xtitle("Risk score" ) ytitle("``var'_label'") ///
					legend(row(1) order(4 "2019"  1 "2020"  )) 
				
				graph export "$figures/did_`var'_by_risk_noline_`version'.png", width(3200) replace

		}
		// Make version that splits by RCT email receveived for 2020 non-risky sample
			
			if strpos("$pretreatment","`var'") == 0 {
			
			gen above = app_risk >= .5 if !mi(app_risk)

			twoway ///
				/// Scatter and line for 2020 -- no email (below)
				(scatter `var' app_risk if post == 1 & samp_cond == 1 & email_received == 0,  ///
				msymbol(X) mlcol(maroon) msize(medlarge)  ) ///
				(scatter `var' app_risk if post == 1 & p1 == 1 & email_received == 0,  ///
				msymbol(X) mlcol(maroon) msize(vlarge)  ) ///
				(scatter `var' app_risk if post == 1 & p99 == 1 & email_received == 0,  ///
				msymbol(X) mlcol(maroon) msize(vlarge)  ) ///
				/// 2020 Received RCT Email (below)
				(scatter `var' app_risk if post == 1 & samp_cond == 1 & email_received == 1 & above == 0,  ///
				msymbol(O) mfcol(none) mlcol(maroon) msize(medlarge)  ) ///
				(scatter `var' app_risk if post == 1 & p1 == 1 & email_received == 1 & above == 0,  ///
				msymbol(O) mfcol(none) mlcol(maroon) msize(vlarge)  ) ///
				(scatter `var' app_risk if post == 1 & p99 == 1 & email_received == 1 & above == 0,  ///
				msymbol(O) mfcol(none) mlcol(maroon) msize(vlarge)  ) ///
				/// 2020 Received Warnings Email (above)
				(scatter `var' app_risk if post == 1 & samp_cond == 1 & email_received == 1 & above == 1,  ///
				msymbol(O) mfcol(maroon%40) mlcol(maroon) msize(medlarge)  ) ///
				(scatter `var' app_risk if post == 1 & p1 == 1 & email_received == 1 & above == 1,  ///
				msymbol(O) mfcol(maroon%40) mlcol(maroon) msize(vlarge)  ) ///
				(scatter `var' app_risk if post == 1 & p99 == 1 & email_received == 1 & above == 1,  ///
				msymbol(O) mfcol(maroon%40) mlcol(maroon) msize(vlarge)  ) ///
				/// Scatter and line for 2019
				(scatter `var' app_risk if post == 0& samp_cond == 1,  ///
					msymbol(D) mfcol(navy%80) mlcol(navy) msize(medlarge)  ) ///
				(scatter `var' app_risk if post == 0 & p1 == 1,  ///
					msymbol(D) mfcol(navy%80) mlcol(navy) msize(vlarge) ) ///
				(scatter `var' app_risk if post == 0 & p99 == 1,  ///
					msymbol(D) mfcol(navy%80) mlcol(navy) msize(vlarge)  ) ///
				, ///
				scheme(s1color) name(g1_`var', replace) ///
				xline(0.5, lpattern(dash) lwidth(medthick) lcol(darknavy)) ///
				ylab(`lb'(`steps')`ub') ysca(range(`lb' `ub'))	///
				xlab(0(0.1) 1) xsca(range(0 1)) ///
				xtitle("Risk score" ) ytitle("``var'_label'") ///
				legend(row(2) order(10 "2019"  7 "2020, smart warnings" ///
					4 "2020, encouragement nudge" 1 "2020, no contact" )) 
			
			graph export "$figures/did_`var'_by_risk_noline_excl_hsresident_by_email.png", width(3200) replace
			
		}
	restore
} // var

graph drop _all

/// RDD graphs for First stage outcomes
foreach var in $outcomes_first_stage {
	preserve
		keep if excl_hsresident == 1
		local `var'_label = "`:var la `var''"
		
			qui reg `var' above app_risk c.app_risk#1.above ///
			if samp_cond == 1 & post == 1 , r
			est sto e_`var'
			
			gen pred_`var'_post_b = _b[_cons] + _b[app_risk]* app_risk ///
				if riskq < 0.5 & post == 1 
				
			gen pred_`var'_post_a = _b[_cons] + _b[above] +  ///
				_b[app_risk]* app_risk + _b[c.app_risk#1.above] * app_risk ///
					if riskq >= 0.5 & post == 1 
					
		replace app_risk = app_risk + 0.5
		
		collapse (mean) `var'  app_risk pred_`var'*_b pred_`var'*_a ///
			samp_cond ///
			, by(riskq post p1 p99)
		
	
		/// Setting y axis
		qui su `var' if post == 1

		local lb = round(`r(min)',0.01)
		local ub = round(`r(max)',0.01)
		local steps = round( 0.01 + (`ub' - `lb') / 10 ,0.01)
		
		if `steps' > 0.05 {
			local lb = round(`lb',0.05)
			local ub = round(`ub',0.05)
			local steps = round(`steps',0.05)
			
		}
		
		if `steps' > 0.1 {
			local lb = round(`lb',0.1)
			local ub = round(`ub',0.1)
			local steps = round(`steps',0.1)
		}
		
		if `steps' > 0.2 {
			local lb = round(`lb',0.2)
			local ub = round(`ub',0.2)
			local steps = round(`steps',0.2)			
		}
		local ub_max = `ub' + `steps'/ 10
		
		// Fix yscale to to be 0 - 1 for pretreatment variables
		if strpos("$pretreatment","`var'") > 0 {
			local ub = 1
			local ub_max = 1
			local lb = 0 
			local steps = .2
		}
		
		twoway ///
			/// Scatter and line for 2020
			(scatter `var' app_risk if post == 1 & samp_cond == 1,  ///
			msymbol(O) mfcol(navy%40) mlcol(navy) msize(medlarge) mcol(navy) ) ///
			(scatter `var' app_risk if post == 1 & p1 == 1,  ///
			msymbol(O) mfcol(navy%40) mlcol(navy) msize(vlarge) mcol(navy) ) ///
			(scatter `var' app_risk if post == 1 & p99 == 1,  ///
			msymbol(O) mfcol(navy%40) mlcol(navy) msize(vlarge) mcol(navy) ) ///
			, ///
			scheme(s1color) name(g1_`var', replace) ///
			xline(0.5, lpattern(dash) lwidth(medthick) lcol(darknavy)) ///
			ylab(`lb'(`steps')`ub') ysca(range(`lb' `ub'))	///
			xlab(0(0.1) 1) xsca(range(0 1)) ///
			xtitle("Risk score" ) ytitle("``var'_label'") ///
			legend(row(1) order( 1 "2020")) 
		
		graph export "$figures/rdd_`var'_by_risk_noline_excl_hsresident.png", width(3200) replace
	restore
} // var

graph drop _all